# encoding=utf-8
import re
import datetime
import time
import dateparser
from lxml import etree
from scrapy.selector import Selector
from scrapy.http import Request
from ..items import Rule2Item
from scrapy.spiders import CrawlSpider
from scrapy.conf import settings
import sys

#reload(sys)

#sys.setdefaultencoding('utf-8')

#class Spider(RedisSpider):
class Spider(CrawlSpider):
    name = "rule2"
    start_urls = []
#    start_urls = ['http://www.szse.cn/szseWeb/FrontController.szse?ACTIONID=7&AJAX=AJAX-TRUE&CATALOGID=1800_jgxxgk&TABKEY=tab1&tab1PAGENO=1&tab1PAGECOUNT=59&tab1RECORDCOUNT=1168']
    for i in range(1,43):
        url = "http://www.szse.cn/szseWeb/FrontController.szse?ACTIONID=7&AJAX=AJAX-TRUE&CATALOGID=main_wxhj&TABKEY=tab1&tab1PAGENO=%s&tab1PAGECOUNT=43&tab1RECORDCOUNT=843&REPORT_ACTION=navigate" % i
        start_urls.append(url)
    def start_requests(self):
        for url in self.start_urls:
            yield Request(url=url,callback=self.parse)
    def parse(self,response):
        item = Rule2Item()
        selector = Selector(response)
        content = selector.xpath('//*[@id="REPORTID_tab1"]/tr')
        for each in content:
            item["com_id"] = each.xpath('td[1]/text()').extract()
            item["com_name"] = each.xpath('td[2]/text()').extract()
            item["let_date"] = each.xpath('td[3]/text()').extract()
            item["let_type"] = each.xpath('td[4]/text()').extract()
            add = each.xpath('td[5]/a/@onclick').extract()
#            add1=add.re('encodeURIComponent"(.*?)"')
            gg = re.findall(r'encodeURIComponent\((.*?)\)',str(add))
            ggsi = re.findall(r'\'(.*?)\'',str(gg))
            ggsimi = 'http://www.szse.cn/UpFiles/fxklwxhj/'+ ''.join(ggsi).strip()
            item["pdf_id"] = ''.join(ggsi).strip()
            item["file_urls"] = [ggsimi]
            item["com_re_con"] = each.xpath('td[6]/a/text()').extract()
#            item["files_path"] = self.files
            yield item
